{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# An attempt at writing a script that can convert every specified file to LeanPub Flavoured Markdown for publication on LeanPub"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nams.builder_lib import read_mkdocs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pyprojroot import here"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mkdocs_config = read_mkdocs()\n",
    "nav = mkdocs_config[\"nav\"]\n",
    "docroot = here() / \"notebooks\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nams.builder_lib import parse_navigation\n",
    "\n",
    "# The goal here is to flatten the tree structure into a list of 2-tuples,\n",
    "# where the title is the first element and the filename is the second element.\n",
    "title_files = parse_navigation(nav, [])\n",
    "title_files.insert(0, ('Preface', '00-preface/preface.md'))\n",
    "title_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nams.builder_lib import exclude\n",
    "\n",
    "exclusion = [\n",
    "    \"Welcome\", \n",
    "    \"Get Setup\",\n",
    "    \"Prerequisites\",\n",
    "    \"Further Learning\",\n",
    "    \"Style Guide\",\n",
    "]\n",
    "\n",
    "title_files = exclude(title_files, titles=exclusion)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "title_files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We now need to convert each of the files into Markua."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "strings = [\"df.head()\", \"another_thing\\ndf2.head()\"]\n",
    "\n",
    "\n",
    "def replace_dataframe_with_markdown(s: str):\n",
    "    new_string = \"\"\n",
    "    for line in s.split(\"\\n\"):\n",
    "        if (line.endswith(\".head()\") \n",
    "            or line.endswith(\".describe()\")\n",
    "            or line.endswith(\"correlation_centrality(graphs[0])\")\n",
    "            or line.endswith(\"find_connected_persons(G, 'p2', 'c10')\")\n",
    "           ):\n",
    "            line = f\"print({line}.to_markdown())\"\n",
    "        new_string += line + \"\\n\"\n",
    "    return new_string\n",
    "\n",
    "# replace_dataframe_head_with_markdown(strings[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def replace_render_html_with_raw(s: str):\n",
    "    new_string = \"\"\n",
    "    for line in s.split(\"\\n\"):\n",
    "        if line.startswith(\"render_html\"):\n",
    "            line = line.replace(\"render_html\", \"\")[1:-1]\n",
    "        new_string += line + \"\\n\"\n",
    "    return new_string"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def replace_admonition(src: str):\n",
    "    \n",
    "    if src.startswith(\"???\"):\n",
    "        new_text = \"\"\n",
    "        for line in src.split(\"\\n\"):    \n",
    "            if line.startswith(\"???\"):\n",
    "                line = \"*Note:*\"\n",
    "            line = line.replace(\"    \", \"\")\n",
    "            new_text += line + \"\\n\"\n",
    "        return new_text\n",
    "    return src\n",
    "\n",
    "text = \"\"\"??? note \"Geospatial Viz\"\n",
    "\n",
    "    As the creator of `nxviz`,\n",
    "    I would recommend using proper geospatial packages\n",
    "    to build custom geospatial graph viz,\n",
    "    such as [`pysal`](http://pysal.org/).)\n",
    "    \n",
    "    That said, `nxviz` can probably do what you need\n",
    "    for a quick-and-dirty view of the data.\n",
    "\"\"\"\n",
    "\n",
    "print(replace_admonition(text))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def replace_markdown_table_tabs(body: str):\n",
    "    return body.replace(\"    |\", \"|\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nbconvert.exporters import MarkdownExporter\n",
    "from nbformat.notebooknode import NotebookNode\n",
    "from nbconvert.preprocessors import ExecutePreprocessor\n",
    "from nams.builder_lib import strip_execution_count\n",
    "\n",
    "def nb2markdown(nb: NotebookNode, kernel: str):\n",
    "    \"\"\"\n",
    "    Compile final notebook into a single PDF while executing it.\n",
    "\n",
    "    :param nb: The compiled notebook object with all notebook cells.\n",
    "    :param kernel: String name of the kernel to output.\n",
    "    \"\"\"\n",
    "    # Convert all `.head()` to `.head().to_markdown()`\n",
    "    # before execution\n",
    "    for i, cell in enumerate(nb[\"cells\"]):\n",
    "        src = nb[\"cells\"][i][\"source\"]\n",
    "        src = (\n",
    "            src\n",
    "            .replace(\"HTML(anim(G2, msg, n_frames=4).to_html5_video())\", \"# HTML(anim(G2, msg, n_frames=4).to_html5_video())\")\n",
    "        )\n",
    "        src = replace_dataframe_with_markdown(src)\n",
    "        src = replace_render_html_with_raw(src)\n",
    "        src = replace_admonition(src)\n",
    "        \n",
    "        nb[\"cells\"][i][\"source\"] = src\n",
    "\n",
    "    ep = ExecutePreprocessor(timeout=600, kernel_name=kernel)\n",
    "    ep.preprocess(nb)\n",
    "\n",
    "    strip_execution_count(nb)\n",
    "    pdf_exporter = MarkdownExporter()\n",
    "    body, resources = pdf_exporter.from_notebook_node(nb)\n",
    "    return body, resources\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from nams.builder_lib import read_notebook"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "sample_chapters = [\"Preface\", \"Learning Goals\", \"Introduction to Graphs\", \"The NetworkX API\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Now, convert everything into plain text markdown."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "from pyprojroot import here\n",
    "\n",
    "build_dir = here() / \"manuscript\"\n",
    "build_dir.mkdir(parents=True, exist_ok=True)\n",
    "\n",
    "images_dir = build_dir / \"images\"\n",
    "images_dir.mkdir(parents=True, exist_ok=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def nth_repl_all(string: str, substring: str, replacement: str, nth: int) -> str:\n",
    "    \"\"\"Replace nth string with substring.\"\"\"\n",
    "    find = string.find(substring)\n",
    "    # loop util we find no match\n",
    "    i = 1\n",
    "    while find != -1:\n",
    "        # if i  is equal to nth we found nth matches so replace\n",
    "        if i == nth:\n",
    "            string = string[:find] + replacement + string[find + len(substring):]\n",
    "            i = 0\n",
    "        # find + len(sub) + 1 means we start after the last match\n",
    "        find = string.find(substring, find + len(substring) + 1)\n",
    "        i += 1\n",
    "    return string\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mdlatex2lfmlatex(text):\n",
    "    text = nth_repl_all(text, substring=\"$$\", replacement=\"{@@}\", nth=1)\n",
    "    text = nth_repl_all(text, substring=\"$\", replacement=\"{@@}\", nth=1)\n",
    "    text = nth_repl_all(text, substring=\"{@@}\", replacement=\"{$$}\", nth=1)\n",
    "    text = nth_repl_all(text, substring=\"{$$}\", replacement=\"{/$$}\", nth=2)\n",
    "    return text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "\n",
    "\n",
    "logger = logging.getLogger()\n",
    "logger.setLevel(logging.INFO)\n",
    "\n",
    "book_txt = \"\"\n",
    "files_to_validate = []\n",
    "\n",
    "\n",
    "\n",
    "for chapter, fpath_str in title_files:\n",
    "    logging.info(f\"Processing chapter {chapter}\")\n",
    "    fpath = Path(fpath_str)\n",
    "    source_path = docroot / fpath\n",
    "    # Handle notebooks\n",
    "    if source_path.suffix == \".ipynb\":\n",
    "        text, resources = nb2markdown(read_notebook(source_path), kernel=\"nams\")\n",
    "    # Handle markdown files\n",
    "    else:\n",
    "        with open(source_path, \"r+\") as f:\n",
    "            text = f.read()\n",
    "        resources = dict()\n",
    "        resources[\"outputs\"] = dict()\n",
    "        \n",
    "    text = f\"# {chapter}\\n\\n\" + text\n",
    "\n",
    "    if chapter in sample_chapters:\n",
    "        insert = \"{sample: true}\\n\\n\"\n",
    "        text = insert + text\n",
    "\n",
    "    # More processing: Replace all output_* with <relative_dir>_md_<autogen_numbers>\n",
    "    img_prefix = str(fpath.with_suffix(\".md\")).replace(\"/\", \"_\").replace(\".\", \"_\") + \"_\"\n",
    "    text = text.replace(\"output_\", \"images/\" + img_prefix)\n",
    "\n",
    "    # More processing: Leanpub Flavoured Markdown uses {$$} to delineate LaTeX.\n",
    "    # text = text.replace(\"$$\", \"{$$}\")\n",
    "    text = mdlatex2lfmlatex(text)\n",
    "    \n",
    "    # More preprocessing: Clean up tabs for all of the markdown tables\n",
    "    text = replace_markdown_table_tabs(text)\n",
    "\n",
    "    markdown_dir = (build_dir / fpath).with_suffix(\".md\")\n",
    "    markdown_dir.mkdir(parents=True, exist_ok=True)\n",
    "    \n",
    "    # Write the text out\n",
    "    \n",
    "    with open(markdown_dir / \"index.md\", \"w+\") as f:\n",
    "        f.write(text)\n",
    "    files_to_validate.append(markdown_dir /  \"index.md\")\n",
    "\n",
    "    # Write the resources out\n",
    "    for k, v in resources[\"outputs\"].items():\n",
    "        k = k.replace(\"output_\", img_prefix)\n",
    "        logging.debug(f\"image filename = {k}\")\n",
    "        with open(images_dir / k, \"wb\") as f:\n",
    "            f.write(v)\n",
    "            \n",
    "    book_txt = book_txt + str(fpath.with_suffix(\".md\") / \"index.md\") + \"\\n\"\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# hack to find all images embedded in markdown and copy it to manuscript/images\n",
    "from shutil import copyfile\n",
    "for image in docroot.glob('**/images/*.png'):\n",
    "    copyfile(image, images_dir / image.name)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(build_dir / \"Book.txt\", \"w+\") as f:\n",
    "    f.write(book_txt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def has_html(s: str) -> bool:\n",
    "    tag_openers = [\"<li\", \"<ul\", \"<ol\", \"<span\", \"<p\", \"<em\"]\n",
    "    for tag in tag_openers:\n",
    "        if tag in s:\n",
    "            return True\n",
    "    return False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Print a line if it's got problems\n",
    "for fpath in files_to_validate:\n",
    "    with open(fpath, \"r+\") as f:\n",
    "        for line in f.readlines():\n",
    "            if line.startswith(\"    |\"):\n",
    "                print(fpath, line)\n",
    "            if line.startswith(\"???\"):\n",
    "                print(fpath, line)\n",
    "            if has_html(line):\n",
    "                print(fpath, line)\n",
    "            if \" '|   |\" in line:\n",
    "                print(fpath, line)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "nams",
   "language": "python",
   "name": "nams"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
